libname Input "Input Data";
libname Output "Output Data";

proc import datafile="Input Data\BoardEx_ListingMatch.csv"
			dbms=dlm
			out=BX_ListingMatch
			replace;
		delimiter=',';
		getnames=yes;
		guessingrows = 41000;
run;

data Contracts;
	infile 'Input Data\GovContracts_2000_2020.csv' delimiter = ',' MISSOVER DSD lrecl = 32767 firstobs = 2;
	informat contract_transaction_unique_key $200.;
	informat contract_award_unique_key $200.;
	informat award_id_piid $200.;
	informat modification_number $200.;
	informat transaction_number BEST32.;
	informat parent_award_id_piid $200.;
	informat parent_award_modification_number $200.;
	informat federal_action_obligation BEST32.;
	informat action_date YYMMDD10.;
	informat awarding_agency_code $200.;
	informat awarding_agency_name $200.;
	informat awarding_sub_agency_code $200.;
	informat awarding_sub_agency_name $200.;
	informat recipient_duns $200.;
	informat recipient_name $200.;
	informat recipient_parent_duns $200.;
	informat recipient_parent_name $200.;
	informat recipient_address_line_1 $200.;
	informat recipient_address_line_2 $200.;
	informat recipient_city_name $200.;
	informat recipient_county_name $200.;
	informat recipient_state_code $200.;
	informat recipient_zip_4_code $200.;
	informat award_type_code $200.;
	informat award_type $200.;
	informat type_of_contract_pricing_code $200.;
	informat naics_code BEST32.;
	informat number_of_offers_received BEST32.;
	
	format contract_transaction_unique_key $200.;
	format contract_award_unique_key $200.;
	format award_id_piid $20.;
	format modification_number $20.;
	format transaction_number BEST12.;
	format parent_award_id_piid $20.;
	format parent_award_modification_number $20.;
	format federal_action_obligation BEST12.;
	format action_date YYMMDD10.;
	format awarding_agency_code $20.;
	format awarding_agency_name $200.;
	format awarding_sub_agency_code $20.;
	format awarding_sub_agency_name $200.;
	format recipient_duns $20.;
	format recipient_name $200.;
	format recipient_parent_duns $20.;
	format recipient_parent_name $200.;
	format recipient_address_line_1 $200.;
	format recipient_address_line_2 $200.;
	format recipient_city_name $50.;
	format recipient_county_name $50.;
	format recipient_state_code $20.;
	format recipient_zip_4_code $20.;
	format award_type_code $20.;
	format award_type $50.;
	format type_of_contract_pricing_code $20.;
	format naics_code BEST12.;
	format number_of_offers_received BEST12.;

    input
		contract_transaction_unique_key $
		contract_award_unique_key $
		award_id_piid $
		modification_number $
		transaction_number
		parent_award_id_piid $
		parent_award_modification_number $
		federal_action_obligation
		action_date
		awarding_agency_code $
		awarding_agency_name $
		awarding_sub_agency_code $
		awarding_sub_agency_name $
		recipient_duns $
		recipient_name $
		recipient_parent_duns $
		recipient_parent_name $
		recipient_address_line_1 $
		recipient_address_line_2 $
		recipient_city_name $
		recipient_county_name $
		recipient_state_code $
		recipient_zip_4_code $
		award_type_code $
		award_type $
		type_of_contract_pricing_code $
		naics_code
		number_of_offers_received;
run;

/* Clean unique contracts */

data Contracts_Stats;
	set Contracts (keep = award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code modification_number transaction_number
							federal_action_obligation action_date number_of_offers_received award_type_code type_of_contract_pricing_code);
run;

data Contracts_Comp;
	set Contracts (keep = award_id_piid parent_award_id_piid awarding_sub_agency_code award_type_code recipient_name recipient_parent_name naics_code);
run;

proc delete data = Contracts;

proc sort data = Contracts_Stats; by award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code award_type_code type_of_contract_pricing_code; run;

proc means data = Contracts_Stats noprint;
	by award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code;
	var action_date;
	output out = First_Action min = first_action_date;
run;

data Contracts_Stats;
	merge Contracts_Stats First_Action (drop = _:);
	by award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code;
	Reneg = 0;
	if action_date - first_action_date > 365 then Reneg = 1;
run;

proc delete data = First_Action; run;

proc means data = Contracts_Stats (where = (Reneg = 0)) noprint;
	by award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code award_type_code type_of_contract_pricing_code;
	var federal_action_obligation;
	output out = Unique_Contracts sum = total_obligation min(action_date) = action_date max(number_of_offers_received) = n_offers;
run;

proc means data = Contracts_Stats (where = (Reneg = 1)) noprint;
	by award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code;
	var federal_action_obligation;
	output out = Unique_Reneg sum = reneg_obligation max = max_reneg min(action_date) = first_reneg_date;
run;

proc sort data = Contracts_Stats out = Unique_Reneg2 (where = (Reneg = 1));
	by award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code descending federal_action_obligation action_date;
run;
proc sort data = Unique_Reneg2 nodupkey; by award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code; run;
proc sort data = Unique_Reneg; by award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code max_reneg; run;

data Unique_Reneg;
	merge Unique_Reneg (in = a) Unique_Reneg2 (keep = award_id_piid parent_award_id_piid awarding_agency_code
														awarding_sub_agency_code federal_action_obligation action_date
													rename = (federal_action_obligation = max_reneg action_date = reneg_date));
	by award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code max_reneg;
	if a = 1;
run;

proc delete data = Contracts_Stats; run;

data Unique_Contracts;
	set Unique_Contracts;
	if total_obligation  = . then delete;
	if total_obligation  < 0 then delete;
	if n_offers = 999 then n_offers = .;

	Fixed_Price = 0;
	if type_of_contract_pricing_code = "A" then Fixed_Price = 1;
	if type_of_contract_pricing_code = "B" then Fixed_Price = 1;
	if type_of_contract_pricing_code = "J" then Fixed_Price = 1;
	if type_of_contract_pricing_code = "K" then Fixed_Price = 1;
	if type_of_contract_pricing_code = "L" then Fixed_Price = 1;
	if type_of_contract_pricing_code = "M" then Fixed_Price = 1;

	Cost_Plus = 0;
	if type_of_contract_pricing_code = "R" then Cost_Plus = 1;
	if type_of_contract_pricing_code = "U" then Cost_Plus = 1;
	if type_of_contract_pricing_code = "V" then Cost_Plus = 1;
	if type_of_contract_pricing_code = "Y" then Cost_Plus = 1;

	Incentives = 0;
	if type_of_contract_pricing_code = "L" then Incentives = 1;
	if type_of_contract_pricing_code = "M" then Incentives = 1;
	if type_of_contract_pricing_code = "R" then Incentives = 1;
	if type_of_contract_pricing_code = "U" then Incentives = 1;
	if type_of_contract_pricing_code = "V" then Incentives = 1;
run;

data Unique_Reneg;
	set Unique_Reneg;
	if reneg_obligation  = . then delete;
	if reneg_obligation  <= 0 then delete;
run;

data Unique_Contracts;
	merge Unique_Contracts (in = a) Unique_Reneg (drop = _: in = b);
	by award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code;
	if a = 1;
	Reneg = 0;
	if b = 1 then Reneg = 1;
	if reneg_obligation = . then reneg_obligation = 0;
run;

proc import datafile="Input Data\Open\Agency_BX_Map.csv"
			dbms=dlm
			out=Agencies
			replace;
		delimiter=';';
		getnames=yes;
		guessingrows = 700;
run;
proc sort data = Agencies (keep = awarding_agency_code awarding_sub_agency_code agcy_bx agencyid_contracts where = (/*agcy_bx ~= . and*/ AgencyID ~= .) rename = (agencyid_contracts = AgencyID)) nodupkey;
	by awarding_agency_code awarding_sub_agency_code AgencyID;
run;

proc sort data = Unique_Contracts; by awarding_agency_code awarding_sub_agency_code; run;
proc sort data = Agencies out = Agencies_Contracts (where = (AgencyID ~= .)) nodupkey; by awarding_agency_code awarding_sub_agency_code; run;

data Unique_Contracts;
	merge Unique_Contracts (in = a) Agencies_Contracts (in = b);
	by awarding_agency_code awarding_sub_agency_code;
	if a = 1 and b = 1;
run;

/* Clean firm info */

data Contracts_Comp;
	set Contracts_Comp;
	COMPANYNAME_GC = recipient_parent_name;
	if COMPANYNAME_GC = "" then COMPANYNAME_GC = recipient_name;	
	if COMPANYNAME_GC = "" then delete;
	Comp_Name = prxchange('s/\(([^\)]+)\)//i', -1, COMPANYNAME_GC);
	Comp_Name = upcase(Comp_Name);
	Comp_Name = tranwrd(Comp_Name,"LLC","");
	Comp_Name = tranwrd(Comp_Name,"INCORPORATED","");
	Comp_Name = tranwrd(Comp_Name,"INC","");
	Comp_Name = tranwrd(Comp_Name,"LTD","");
	Comp_Name = tranwrd(Comp_Name,"CORPORATION","");
	Comp_Name = tranwrd(Comp_Name,"CORPORATI","");
	Comp_Name = tranwrd(Comp_Name,"CORP","");
	Comp_Name = tranwrd(Comp_Name,"COMPANY","");
	Comp_Name = tranwrd(Comp_Name," CO","");
	Comp_Name = tranwrd(Comp_Name,"LP","");
	Comp_Name = tranwrd(Comp_Name,"INTERNATIONAL","");
	Comp_Name = tranwrd(Comp_Name,"INTL","");
	Comp_Name = tranwrd(Comp_Name,"HOLDING","");
	Comp_Name = tranwrd(Comp_Name,"HOLDINGS","");
	Comp_Name = tranwrd(Comp_Name," AND ","");
	Comp_Name = compress(Comp_Name,"ABCDEFGHIJKLMNOPQRSTUVWXYZ012345789","kis");
	Comp_Name = compress(Comp_Name);
run;

proc import datafile="Input Data\CIQ_Contracts_FirmMap.csv"
			dbms=dlm
			out=CIQ_Contracts_Merge
			replace;
		delimiter=',';
		getnames=yes;
run;
proc import datafile="Input Data\BX_CIQ_Merge_NAICS5.csv"
			dbms=dlm
			out=NAICS1
			replace;
		delimiter=',';
		getnames=yes;
		guessingrows = 100000;
run;
proc import datafile="Input Data\CIQ_NAICS.csv"
			dbms=dlm
			out=NAICS2
			replace;
		delimiter=',';
		getnames=yes;
		guessingrows = 100000;
run;

proc sql;
	create table CIQ_Contracts_Merge2 as
		select * from CIQ_Contracts_Merge a, NAICS1 b, NAICS2 (keep = CompanyID) c
			where a.CompanyID = b.CompanyID and b.CompanyID = c.CompanyID;
quit;

proc sql;
	create table Contracts_Comp2 as
		select * from Contracts_Comp a, CIQ_Contracts_Merge2 (drop = CompanyID) b
			where a.Comp_Name = b.Comp_Name;
quit;

proc delete data = CIQ_Contracts_Merge;
proc delete data = CIQ_Contracts_Merge2;
proc delete data = Contracts_Comp;
proc delete data = NAICS1;
proc delete data = NAICS2; run;

proc sort data = Contracts_Comp2 (drop = recipient_name recipient_parent_name naics_code companyname_gc comp_name rename = (BoardID = CompanyID))nodupkey;
	by award_id_piid parent_award_id_piid awarding_sub_agency_code award_type_code CompanyID;
run;




/* Merge and finalize unique_contracts with firm info */

proc sort data = Contracts_Comp2; by award_id_piid parent_award_id_piid awarding_sub_agency_code award_type_code; run;
proc sort data = Unique_Contracts; by award_id_piid parent_award_id_piid awarding_sub_agency_code award_type_code; run;

data Unique_Contracts;
	merge Unique_Contracts (in = a drop = _:) Contracts_Comp2 (in = b);
	by award_id_piid parent_award_id_piid awarding_sub_agency_code award_type_code;
	if a = 1 and b = 1;
	Year = year(action_date);
	Reneg_Year = year(reneg_date);
	First_Reneg_Year = year(first_reneg_date);
run;

proc sort data = Unique_Contracts; by CompanyID; run;
proc sort data = BX_ListingMatch; by CompanyID; run;

data Unique_Contracts;
	merge Unique_Contracts (in = a) BX_ListingMatch (in = b);
	by CompanyID;
	if a = 1;
	if b = 1 then CompanyID = CompanyID_New;
	drop CompanyID_New;
run; 

proc delete data = Contracts_Comp2;
proc delete data = Unique_Reneg; run;





















/* Create BoardEx dataset */

proc sort data = Input.BoardEx_Emp out = Director (keep = DirectorID CompanyID OrgType HOCountryName DateStartRole DateEndRole RoleName); by CompanyID RoleName; run;
proc import datafile="Input Data\Open\Agency_BX_Map.csv"
			dbms=dlm
			out=Agencies
			replace;
		delimiter=';';
		getnames=yes;
		guessingrows = 700;
run;
proc sort data = Agencies (keep = agcy_bx agencyid_contracts where = (CompanyID ~= . and AgencyID ~= .) rename = (agcy_bx = CompanyID agencyid_contracts = AgencyID)) nodupkey;
	by CompanyID AgencyID;
run;
proc sort data = Input.BoardEx_Comp out = Company (keep = BoardID CIKCODE HOADDRESS4 rename = (BoardID = CompanyID HOADDRESS4 = State_Long)) nodupkey; by BoardID; run;

data Company;
	set Company;	
	if State_Long = "Alabama" then State = "AL";
	if State_Long = "Alabama AL" then State = "AL";
	if State_Long = "Alaska AK" then State = "AK";
	if State_Long = "Arizona AZ" then State = "AZ";
	if State_Long = "Arkansas AR" then State = "AR";
	if State_Long = "California CA" then State = "CA";
	if State_Long = "Colorado CO" then State = "CO";
	if State_Long = "Connecticut CT" then State = "CT";
	if State_Long = "Delaware DE" then State = "DE";
	if State_Long = "District Of Columbia DC" then State = "DC";
	if State_Long = "Florida FL" then State = "FL";
	if State_Long = "Georgia GA" then State = "GA";
	if State_Long = "Hawaii HI" then State = "HI";
	if State_Long = "Idaho ID" then State = "ID";
	if State_Long = "Illinois IL" then State = "IL";
	if State_Long = "Indiana IN" then State = "IN";
	if State_Long = "Iowa IA" then State = "IA";
	if State_Long = "Kansas KS" then State = "KS";
	if State_Long = "Kentucky KY" then State = "KY";
	if State_Long = "Louisiana LA" then State = "LA";
	if State_Long = "Maine ME" then State = "ME";
	if State_Long = "Maryland MD" then State = "MD";
	if State_Long = "Massachusetts MA" then State = "MA";
	if State_Long = "Michigan MI" then State = "MI";
	if State_Long = "Minnesota MN" then State = "MN";
	if State_Long = "Mississippi MS" then State = "MS";
	if State_Long = "Missouri MO" then State = "MO";
	if State_Long = "Montana MT" then State = "MT";
	if State_Long = "Nebraska NE" then State = "NE";
	if State_Long = "Nevada NV" then State = "NV";
	if State_Long = "New Hampshire NH" then State = "NH";
	if State_Long = "New Jersey NJ" then State = "NJ";
	if State_Long = "New Mexico NM" then State = "NM";
	if State_Long = "New York NY" then State = "NY";
	if State_Long = "North Carolina NC" then State = "NC";
	if State_Long = "North Dakota ND" then State = "ND";
	if State_Long = "Ohio OH" then State = "OH";
	if State_Long = "Oklahoma OK" then State = "OK";
	if State_Long = "Oregon OR" then State = "OR";
	if State_Long = "Pennsylvania PA" then State = "PA";
	if State_Long = "Rhode Island RI" then State = "RI";
	if State_Long = "South Carolina SC" then State = "SC";
	if State_Long = "South Dakota SD" then State = "SD";
	if State_Long = "Tennessee TN" then State = "TN";
	if State_Long = "Texas TX" then State = "TX";
	if State_Long = "Utah" then State = "UT";
	if State_Long = "Utah UT" then State = "UT";
	if State_Long = "Vermont VT" then State = "VT";
	if State_Long = "Virginia VA" then State = "VA";
	if State_Long = "Washington WA" then State = "WA";
	if State_Long = "West Virginia WV" then State = "WV";
	if State_Long = "Wisconsin WI" then State = "WI";
	if State_Long = "Wyoming WY" then State = "WY";
	drop State_Long;
run;

proc import datafile="Input Data\Open\Plum_Book_2016.csv"
			dbms=dlm
			out=Plum_2016
			replace;
		delimiter=';';
		getnames=yes;
		guessingrows = 9000;
run;
proc import datafile="Input Data\Open\BoardEx_PlumBook_Agencies.csv"
			dbms=dlm
			out=PB_Agencies
			replace;
		delimiter=';';
		getnames=yes;
		guessingrows = 2000;
run;

/* Select set of "powerful" employees */
data Plum_2016;
	set Plum_2016 (keep = Agcy_Name Org_Name Title Type_of_Appt Pay_Plan Pay);
	if Type_of_Appt ~= "PAS" and Type_of_Appt ~= "PA" then delete;
run;

proc sql;
	create table Plum as
		select * from Plum_2016 a, PB_Agencies (where = (CompanyID ~= .)) b
			where a.Agcy_Name = b.Agcy_Name and a.Org_Name = b.Org_Name;
quit;

proc sort data = Plum nodupkey; by CompanyID Title; run;

data Director;
	merge Plum (in = b drop = Agcy_Name Org_Name Pay_Plan Pay rename = (Title = RoleName)) Director (in = a);
	by CompanyID RoleName;
	if a = 1;
	TopRole = 0;
	if b = 1 then TopRole = 1;
	if DateStartRole = .N then delete;
	if DateEndRole = .N then delete;
	if DateEndRole = .C then DateEndRole = mdy(12,31,2019);
	if DateEndRole < DateStartRole then delete;

	/* Dropping director-companies with zero tenure */
	if CompanyID = 33491 and DirectorID = 1879228 then delete;
	if CompanyID = 127647 and DirectorID = 1002662 then delete;
run;

proc sort data = BX_ListingMatch; by CompanyID; run;

data Director;
	merge Director (in = a) BX_ListingMatch (in = b);
	by CompanyID;
	if a = 1;
	if b = 1 then CompanyID = CompanyID_New;
	drop CompanyID_New;
run;

proc sort data = Director; by DirectorID CompanyID DateStartRole; run;

data Director;
	keep DirectorID CompanyID DateStartRole_new DateEndRole HOCOUNTRYNAME ORGTYPE TopRole;
	rename DateStartRole_new = DateStartRole; 
	set Director;
	by DirectorID CompanyID;
	retain DateStartRole_new DateEndRole_last;
	format DateStartRole_new YYMMDDN8.;
	if first.CompanyID then do;
		DateStartRole_new = DateStartRole;
		DateEndRole_last = .;
		TopRole_new = TopRole;
	end;
	else do;
		if year(DateStartRole) - year(DateEndRole_last) > 1 then DateStartRole_new = DateStartRole;
		TopRole_new = TopRole_new + TopRole;
	end;
	DateEndRole_last = DateEndRole;
	if TopRole_new > 0 then TopRole = 1;
run;

proc sort data = Director; by DirectorID CompanyID DateStartRole descending DateEndRole; run;
proc sort data = Director nodupkey; by DirectorID CompanyID DateStartRole; run;

data Director;
	set Director;
	by DirectorID CompanyID;
	at = DateEndRole - DateStartRole;
	retain Tenure;
	if first.CompanyID then do;
		Tenure = at;
	end;
	else do;
		Tenure = Tenure + at;
	end;
	drop at;
run;

proc sql;
	create table Director_Company1 as
		select * from Director a, Agencies b
			where a.CompanyID = b.CompanyID;
quit;

data Director_Company1;
	set Director_Company1;
	Connect = 1;
	Connect_Top = 0;
	if TopRole = 1 then Connect_Top = 1;
run;

proc sort data = Director; by CompanyID; run;

data Director_Company2;
	merge Director (in = a) Agencies (in = b);
	by CompanyID;
	if b ~= 1;
	Connect = 0;
	Connect_Top = 0;
run;

proc append base = Director_Company1 data = Director_Company2; run;

proc sort data = Director_Company1; by CompanyID; run;
proc sort data = Company; by CompanyID; run;

data Director_Company;
	merge Director_Company1 (in = a) Company;
	by CompanyID;
	if a = 1;	
	if HOCountryName ~= "United States" then delete;
run;

data Director_Agency;
	set Director_Company;
	if Connect = 0 then delete;
	rename DateStartRole = AgencyStartDate DateEndRole = AgencyEndDate Tenure = Agency_Tenure;
run;

proc sort data = Director_Company (drop = AgencyID Connect:); by DirectorID CompanyID; run;

proc sql;
	create table Director_Company_Agency as
		select * from Director_Company a, Director_Agency (keep = DirectorID AgencyEndDate AgencyStartDate Agency_Tenure AgencyID Connect Connect_Top) b
			where a.DirectorID = b.DirectorID and a.DateEndRole > b.AgencyStartDate;
quit;

proc sort data = Director_Company_Agency; by CompanyID DirectorID DateEndRole DateStartRole AgencyID descending AgencyEndDate; run;
proc sort data = Director_Company_Agency nodupkey; by CompanyID DirectorID DateEndRole DateStartRole AgencyID; run;

data Year_List;
	do Year = 1990 to 2019;
		output;
	end;
run;

proc import datafile="Input Data\BoardEx_Coverage_20220520.csv"
			dbms=dlm
			out=BX_Coverage
			replace;
		delimiter=';';
		getnames=yes;
		guessingrows = 41000;
run;

proc sort data = BX_Coverage; by Company_ID; run;
proc sort data = BX_ListingMatch; by CompanyID; run;

data BX_Coverage;
	merge BX_Coverage (in = a rename = (Company_ID = CompanyID)) BX_ListingMatch (in = b);
	by CompanyID;
	if a = 1;
	if First_AR_Date ~= "Current" and First_AR_Date ~= "#N/A" then First_Year = input(First_AR_Date, 8.) - 21916;
	if First_AR_Date = "Current" then First_Year = mdy(05,20,2022);
	format First_Year mmddyy10.;
	if b = 1 then CompanyID = CompanyID_New;
run;

proc sort data = BX_Coverage; by CompanyID; run;

proc means data = BX_Coverage noprint;
	by CompanyID;
	var First_Year;
	output out = BX_Coverage min = First_Year;
run;

proc sql;
	create table Director_Company_Agency_Year as
		select * from Director_Company_Agency a, Year_List b, BX_Coverage (keep = CompanyID First_Year) c
			where year(a.DateStartRole) <= b.Year and year(a.DateEndRole) >= b.Year and a.CompanyID = c.CompanyID and b.Year >= year(c.First_Year+1);
quit;

proc sort data = Director_Company_Agency_Year; by CompanyID DirectorID HOCOUNTRYNAME ORGTYPE CIKCODE State Agency: Connect: Year First_Year; run;
proc means data = Director_Company_Agency_Year noprint;
	by CompanyID DirectorID HOCOUNTRYNAME ORGTYPE CIKCODE State Agency: Connect: Year First_Year;
	var Tenure DateEndRole;
	output out = Director_Company_Agency_Year_U min(DateStartRole) = DateStartRole max = Tenure DateEndRole;
run;

data Director_Company_Agency_Year_U;
	set Director_Company_Agency_Year_U;
	
	Company_Years = (DateEndRole - DateStartRole)/365;
	Cool_Off = (DateStartRole - AgencyEndDate)/365;
	Agency_Years = (AgencyEndDate - AgencyStartDate)/365;

	if year(AgencyStartDate) > Year then Connect = 0;
	if year(AgencyStartDate) > Year then Connect_Top = 0;
	if Cool_Off < 0 then Cool_Off = 0;

	if DateStartRole - AgencyStartDate < 0 then Connect = 0;
	
	/* SET CUTOFF FOR COOL OFF PERIOD */
	Connect_DW = Connect;
	if Cool_Off <= 2 then Connect_DW = 0;
	if Cool_Off > 2 then Connect = 0;
	if Cool_Off > 2 then Connect_Top = 0;

	Appointment = 0;
	if year(DateStartRole) = Year and Connect = 1 then Appointment = 1;
	Appointment_DW = 0;
	if year(DateStartRole) = Year and Connect_DW = 1 then Appointment_DW = 1;
	Appointment_Top = 0;
	if year(DateStartRole) = Year and Connect_Top = 1 then Appointment_Top = 1;
	
	Departure = 0;
	if year(DateEndRole) = Year and Connect = 1 then Departure = 1;
	Departure_DW = 0;
	if year(DateEndRole) = Year and Connect_DW = 1 then Departure_DW = 1;
	Departure_Top = 0;
	if year(DateEndRole) = Year and Connect_Top = 1 then Departure_Top = 1;
run;

proc sort data = Director_Company_Agency_Year_U; by CompanyID AgencyID Year DirectorID; run;

proc means data = Director_Company_Agency_Year_U noprint;
	by CompanyID AgencyID Year;
	var Connect Connect_DW Connect_Top;
	output out = Company_Agency_Year sum = Connect_Num Connect_Num_DW Connect_Num_Top
									max(Appointment Appointment_DW Appointment_Top) = Appointment Appointment_DW Appointment_Top
									max(Departure Departure_DW Departure_Top) = Departure Departure_DW Departure_Top;
run;






proc sql;
	create table Director_Company_Year as
		select * from Director_Company a, Year_List b, BX_Coverage (keep = CompanyID First_Year) c
			where year(a.DateStartRole) <= b.Year and year(a.DateEndRole) >= b.Year and a.CompanyID = c.CompanyID and b.Year >= year(c.First_Year+1);
quit;

proc sort data = Director_Company_Year; by CompanyID DirectorID HOCOUNTRYNAME ORGTYPE CIKCODE State Year First_Year; run;
proc means data = Director_Company_Year noprint;
	by CompanyID DirectorID HOCOUNTRYNAME ORGTYPE CIKCODE State Year First_Year;
	var Tenure DateEndRole;
	output out = Director_Company_Year_U min(DateStartRole) = DateStartRole max = Tenure DateEndRole;
run;

data Director_Company_Year_U;
	set Director_Company_Year_U;
	if ORGTYPE ~= "Private" and ORGTYPE ~= "Quoted" and ORGTYPE ~= "Partnership" then delete;
	Public = 0;
	if ORGTYPE = "Quoted" then Public = 1;
	drop ORGTYPE;
run;

proc sort data = Director_Company_Year_U; by CompanyID State Year; run;

proc means data = Director_Company_Year_U noprint;
	by CompanyID State Year;
	var DirectorID;
	output out = Company_Year n = n_People min(Public) = Public;
run;

proc sort data = Agencies out = Agencies_RD nodupkey; by AgencyID; run;

proc sql;
	create table Company_Agency_Year_Full as
		select * from Company_Year (drop = _:) a, Agencies_RD (keep = AgencyID) b;
quit;

proc sort data = Company_Agency_Year; by CompanyID AgencyID Year; run;
proc sort data = Company_Agency_Year_Full; by CompanyID AgencyID Year; run;

data Company_Agency_Year_Full;
	merge Company_Agency_Year (drop = _:) Company_Agency_Year_Full (in = b);
	by CompanyID AgencyID Year;
	if b = 1;
	if Connect_Num = . then Connect_Num = 0;
	if Connect_Num_DW = . then Connect_Num_DW = 0;
	if Connect_Num_Top = . then Connect_Num_Top = 0;
	if Appointment = . then Appointment = 0;
	if Appointment_DW = . then Appointment_DW = 0;
	if Appointment_Top = . then Appointment_Top = 0;
	if Departure = . then Departure = 0;
	if Departure_DW = . then Departure_DW = 0;
	if Departure_Top = . then Departure_Top = 0;
run;

proc import datafile="Input Data\BX_CIQ_Merge_NAICS5.csv"
			dbms=dlm
			out=NAICS1
			replace;
		delimiter=',';
		getnames=yes;
		guessingrows = 100000;
run;
proc import datafile="Input Data\CIQ_NAICS.csv"
			dbms=dlm
			out=NAICS2
			replace;
		delimiter=',';
		getnames=yes;
		guessingrows = 100000;
run;

proc sort data = NAICS1; by CompanyID; run;
proc sort data = NAICS2; by CompanyID; run;

data NAICS;
	merge NAICS1 (in = a) NAICS2 (in = b keep = NAICS_Code CompanyID);
	by CompanyID;
	if a = 1 and b = 1;
	NAICS = NAICS_Code;
	if floor(NAICS_Code/100000) = 0 then NAICS = NAICS_Code*10;
	if floor(NAICS_Code/10000) = 0 then NAICS = NAICS_Code*100;
	if floor(NAICS_Code/1000) = 0 then NAICS = NAICS_Code*1000;
	if floor(NAICS_Code/100) = 0 then NAICS = NAICS_Code*10000;
	if floor(NAICS_Code/10) = 0 then NAICS = NAICS_Code*100000;
	keep BoardID NAICS;
run;

proc sort data = NAICS (rename = (BoardID = CompanyID)) nodupkey; by CompanyID; run;
proc sort data = Company_Agency_Year_Full; by CompanyID Year; run;

data CAY;
	merge Company_Agency_Year_Full (in = a) NAICS (keep = CompanyID NAICS in = b);
	by CompanyID;
	if a = 1 and b = 1;
	Yearm2 = Year - 2;
	Yearm1 = Year - 1;
	Year2 = Year + 1;
	Year3 = Year + 2;
run;












/* Merge in Unique_Contracts */

proc sort data = Unique_Contracts; by CompanyID AgencyID Year; run;
proc sort data = CAY; by CompanyID AgencyID Year; run;

data Reneg_Analysis;
	merge CAY (in = a) Unique_Contracts (in = b);
	by CompanyID AgencyID Year;
	if a = 1 and b = 1;
run;

proc sort data = Reneg_Analysis; by CompanyID AgencyID Reneg_Year; run;

data Reneg_Analysis;
	merge CAY (in = a rename = (Year = Reneg_Year Connect_Num = Reneg_Connect_Num Connect_Num_DW = Reneg_Connect_Num_DW)
						keep = CompanyID AgencyID Year Connect_Num Connect_Num_DW)
			Reneg_Analysis (in = b);
	by CompanyID AgencyID Reneg_Year;
	if b = 1;
	if Reneg_Connect_Num = . then Reneg_Connect_Num = 0;
	if Reneg_Connect_Num_DW = . then Reneg_Connect_Num_DW = 0;
run;

proc sort data = Reneg_Analysis; by CompanyID AgencyID First_Reneg_Year; run;

data Reneg_Analysis;
	merge CAY (in = a rename = (Year = First_Reneg_Year Connect_Num = First_Reneg_Connect_Num Connect_Num_DW = First_Reneg_Connect_Num_DW)
						keep = CompanyID AgencyID Year Connect_Num Connect_Num_DW)
			Reneg_Analysis (in = b);
	by CompanyID AgencyID First_Reneg_Year;
	if b = 1;
	if First_Reneg_Connect_Num = . then First_Reneg_Connect_Num = 0;
	if First_Reneg_Connect_Num_DW = . then First_Reneg_Connect_Num_DW = 0;
run;

proc sort data = Reneg_Analysis; by CompanyID AgencyID Year; run;
proc sort data = CAY; by CompanyID AgencyID Year2; run;

data Reneg_Analysis;
	merge CAY (in = a rename = (Year2 = Year Connect_Num = FConnect_Num Connect_Num_DW = FConnect_Num_DW)
						keep = CompanyID AgencyID Year2 Connect_Num Connect_Num_DW)
			Reneg_Analysis (in = b);
	by CompanyID AgencyID Year;
	if b = 1;
run;

proc sort data = CAY; by CompanyID AgencyID Year3; run;

data Reneg_Analysis;
	merge CAY (in = a rename = (Year3 = Year Connect_Num = F2Connect_Num Connect_Num_DW = F2Connect_Num_DW )
						keep = CompanyID AgencyID Year3 Connect_Num Connect_Num_DW)
			Reneg_Analysis (in = b);
	by CompanyID AgencyID Year;
	if b = 1;
run;

proc sort data = CAY; by CompanyID AgencyID Yearm1; run;

data Reneg_Analysis;
	merge CAY (in = a rename = (Yearm1 = Year Connect_Num = LConnect_Num Connect_Num_DW = LConnect_Num_DW)
						keep = CompanyID AgencyID Yearm1 Connect_Num Connect_Num_DW)
			Reneg_Analysis (in = b);
	by CompanyID AgencyID Year;
	if b = 1;
run;

proc sort data = CAY; by CompanyID AgencyID Yearm2; run;

data Reneg_Analysis;
	merge CAY (in = a rename = (Yearm2 = Year Connect_Num = L2Connect_Num Connect_Num_DW = L2Connect_Num_DW)
						keep = CompanyID AgencyID Yearm2 Connect_Num Connect_Num_DW)
			Reneg_Analysis (in = b);
	by CompanyID AgencyID Year;
	if b = 1;
run;












/* Import RegData by industry */

proc import datafile="Input Data\6-digit.csv"
			dbms=dlm
			out=RegData_Ind6
			replace;
		delimiter=',';
		getnames=yes;
run;

proc import datafile="Input Data\5-digit.csv"
			dbms=dlm
			out=RegData_Ind5
			replace;
		delimiter=',';
		getnames=yes;
run;

data RegData_Ind5;
	set RegData_Ind5;
	label = label*10;
	probability2 = probability;
	drop probability;
run;

proc sort data = RegData_Ind6; by Year Title Part Label; run;
proc sort data = RegData_Ind5; by Year Title Part Label; run;

data RegData_Ind;
	merge RegData_Ind6 RegData_Ind5;
	by Year Title Part Label;
	if probability = . then probability = probability2;
	drop probability2;
run;





/* Import RegData stats */

proc import datafile="Input Data\restrictions.csv"
			dbms=dlm
			out=Restrictions
			replace;
		delimiter=',';
		getnames=yes;
run;

proc sort data = Restrictions out = RegData_Stats (keep = Year Title Part Restrictions); by Year Title Part; run;









/* Import RegData by agencies */

proc import datafile="Input Data\metadata.csv"
			dbms=dlm
			out=Agency_Links
			replace;
		delimiter=',';
		getnames=yes;
		guessingrows = 345000;
run;

proc import datafile="Input Data\Open\Agency_BX_Map.csv"
			dbms=dlm
			out=Agencies
			replace;
		delimiter=';';
		getnames=yes;
		guessingrows = 700;
run;

proc sort data = Agencies (keep = department_id agency_id agencyid_contracts where = (agencyid_contracts ~= . and department_id ~= .)) nodupkey;
	by department_id agency_id agencyid_contracts;
run;

proc sql;
	create table RegData_Agency as
		select * from Agency_Links a, Agencies b
			where a.Agency_ID = b.agency_id and a.Department_ID = b.department_id;
quit;

data RegData_Agency;
	set RegData_Agency;
	AgencyID = agencyid_contracts;
	if Agency_ID = 999999 then delete;
	drop Agency Agency_ID Department Department_ID Key Document_ID agencyid_contracts;
run;






/* Merge everything together */

proc sql;
	create table RegData as
		select * from RegData_Ind a, RegData_Agency b, RegData_Stats c
			where a.Year = b.Year and b.Year = c.Year and a.Title = b.Title and b.Title = c.Title and a.Part = b.Part and b.Part = c.Part;
quit;

data RegData;
	set RegData;
	Restrictions_Eff = Restrictions*Probability;
run;

proc sort data = RegData (rename = (Label = NAICS)); by AgencyID Year NAICS; run;

proc means data = RegData noprint;
	by AgencyID Year NAICS;
	var Restrictions_Eff;
	output out = RegData_Unique sum = Restrictions;
run;

proc sort data = RegData_Unique out = Output.RegData_Run_Contracts; by NAICS AgencyID Year; run;






/* Final merge */

proc sort data = Reneg_Analysis; by NAICS AgencyID Year;
proc sort data = Output.RegData_Run_Contracts; by NAICS AgencyID Year;

data Output.Reneg_Analysis;
	merge Reneg_Analysis (in = a) Output.RegData_Run_Contracts (in = b);
	by NAICS AgencyID Year;
	if a = 1;
run;


proc export data = Output.Reneg_Analysis outfile = "Output Data\reneg_analysis.dta"
	dbms = dta replace;
run;
